/*
 * Copyright (c) 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009
 *	The President and Fellows of Harvard College.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. Neither the name of the University nor the names of its contributors
 *    may be used to endorse or promote products derived from this software
 *    without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <kern/mips/regdefs.h>
#include <mips/specialreg.h>

   .set noreorder

   .text
   .globl __start
   .type __start,@function
   .ent __start
__start:

   /*
    * Stack frame. We save the return address register, even though
    * it contains nothing useful. This is for gdb's benefit when it
    * comes disassembling. We also need 16 bytes for making a call,
    * and we have to align to an 8-byte (64-bit) boundary, so the
    * total frame size is 24.
    *
    * Note that the frame here must match the frame we set up below
    * when we switch off the bootup stack. Otherwise, gdb gets very
    * confused.
    */
   .frame sp, 24, $0	/* 24-byte sp-relative frame; return addr on stack */
   .mask 0x80000000, -4	/* register 31 (ra) saved at (sp+24)-4 */
   addiu sp, sp, -24
   sw ra, 20(sp)
   
   /*
    * The System/161 loader sets up a boot stack for the first
    * processor at the top of physical memory, and passes us a single
    * string argument. The string lives on the very top of the stack.
    * We get its address in a0.
    *
    * The kernel loads at virtual address 0x80000200, which is
    * physical address 0x00000200. The space immediately below this
    * is reserved for the exception vector code.
    *
    * The symbol _end is generated by the linker. It's the address of
    * the end of the kernel. It's not a variable; the *value* of the
    * _end symbol itself is this address. In C you'd use "&_end".
    *
    * We set up the memory map like this:
    *
    *         top of memory
    *                         free memory
    *         P + 0x1000
    *                         first thread's stack (1 page)
    *         P
    *                         wasted space (< 1 page)
    *                         copy of the boot string
    *         _end          
    *                         kernel
    *         0x80000200
    *                         exception handlers
    *         0x80000000
    *
    * where P is the next whole page after copying the argument string.
    */

   la s0, _end		/* stash _end in a saved register */
   
   move a1, a0		/* move bootstring to the second argument */
   move a0, s0		/* make _end the first argument */
   jal strcpy		/* call strcpy(_end, bootstring) */
   nop			/* delay slot */

   move a0, s0		/* make _end the first argument again */
   jal strlen		/* call strlen(_end) */
   nop

   add t0, s0, v0	/* add in the length of the string */
   addi t0, t0, 1	/* and the null terminator */
   
   
   addi t0, t0, 4095	/* round up to next page boundary */
   li   t1, 0xfffff000
   and  t0, t0, t1

   addi t0, t0, 4096	/* add one page to hold the stack */

   move sp, t0		/* start the kernel stack for the first thread here */

   sw t0, firstfree	/* remember the first free page for later */

   /*
    * At this point, s0 contains the boot argument string, and no other
    * registers contain anything interesting (except the stack pointer).
    */

   /*
    * Now set up a stack frame on the real kernel stack: a dummy saved
    * return address and four argument slots for making function calls,
    * plus a wasted slot for alignment.
    *
    * (This needs to match the stack frame set up at the top of the
    * function, or the debugger gets confused.)
    */
   addiu sp, sp, -24
   sw $0, 20(sp)

   /*
    * Now, copy the exception handler code onto the first page of memory.
    */

   li a0, EXADDR_UTLB
   la a1, mips_utlb_handler
   la a2, mips_utlb_end
   sub a2, a2, a1
   jal memmove
   nop

   li a0, EXADDR_GENERAL
   la a1, mips_general_handler
   la a2, mips_general_end
   sub a2, a2, a1
   jal memmove
   nop

   /*
    * Flush the instruction cache to make sure the above changes show
    * through to instruction fetch.
    */
   jal mips_flushicache
   nop

   /*
    * Initialize the TLB.
    */
   jal tlb_reset
   nop

   /*
    * Load NULL into the register we use for curthread.
    */
   li s7, 0

   /*
    * Set up the status register.
    *
    * The MIPS has six hardware interrupt lines and two software interrupts.
    * These are individually maskable in the status register. However, we
    * don't use this feature (for simplicity) - we only use the master 
    * interrupt enable/disable flag in bit 0. So enable all of those bits
    * now and forget about them.
    *
    * The BEV bit in the status register, if set, causes the processor to
    * jump to a different set of hardwired exception handling addresses.
    * This is so that the kernel's exception handling code can be loaded
    * into RAM and that the boot ROM's exception handling code can be ROM.
    * This flag is normally set at boot time, and we need to be sure to
    * clear it.
    *
    * The KUo/IEo/KUp/IEp/KUc/IEc bits should all start at zero.
    *
    * We also want all the other random control bits (mostly for cache
    * stuff) set to zero.
    *
    * Thus, the actual value we write is CST_IRQMASK.
    */
   
   li  t0, CST_IRQMASK		/* get value */
   mtc0 t0, c0_status		/* set status register */

   /*
    * Load the CPU number into the PTBASE field of the CONTEXT
    * register. This is necessary to read from cpustacks[] and
    * cputhreads[] on trap entry from user mode. See further
    * discussions elsewhere.
    *
    * Because the boot CPU is CPU 0, we can just send 0.
    */
   mtc0 $0, c0_context

   /*
    * Load the GP register. This is a MIPS ABI feature; the GP
    * register points to an address in the middle of the data segment,
    * so data can be accessed relative to GP using one instruction
    * instead of the two it takes to set up a full 32-bit address.
    */
   la gp, _gp
   
   /*
    * We're all set up!
    * Fetch the copy of the bootstring as the argument, and call main.
    */
   jal kmain
   move a0, s0			/* in delay slot */


   /*
    * kmain shouldn't return. panic.
    * Loop back just in case panic returns too.
    */
1:
   la  a0, panicstr
   jal panic
   nop				/* delay slot */
   j 1b
   nop				/* delay slot */
   .end __start

   .rdata
panicstr:
   .asciz "kmain returned\n"

   /*
    * CPUs started after the boot CPU come here.
    */
   .text
   .globl cpu_start_secondary
   .type cpu_start_secondary,@function
   .ent cpu_start_secondary
cpu_start_secondary:
   
   /*
    * When we get here our stack points to the CRAM area of the bus
    * controller per-CPU space. This means we can, with a bit of
    * caution, call C functions, but nothing very deeply nesting.
    * However, we don't need to.
    *
    * The a0 register contains the value that was put in the second
    * word of the CRAM area, which is the (software) cpu number for
    * indexing cpustacks[]. None of the other registers contain
    * anything useful.
    */


   /*
    * Stack frame. We save the return address register, even though
    * it contains nothing useful. This is for gdb's benefit when it
    * comes disassembling. We also need 16 bytes for making a call,
    * and 4 bytes for alignment, so the total frame size is 24.
    *
    * Note that the frame here must match the frame we set up below
    * when we switch stacks. Otherwise, gdb gets very confused.
    */
   .frame sp, 24, $0	/* 24-byte sp-relative frame; return addr on stack */
   .mask 0x80000000, -4	/* register 31 (ra) saved at (sp+24)-4 */
   addiu sp, sp, -24
   sw ra, 20(sp)

   /*
    * Fetch the stack out of cpustacks[].
    */
   lui t0, %hi(cpustacks)	/* load upper half of cpustacks base addr */
   sll v0, a0, 2		/* get byte index for array (multiply by 4) */
   addu t0, t0, v0		/* add it in */
   lw sp, %lo(cpustacks)(t0)	/* get the stack pointer */

   /*
    * Now fetch curthread out of cputhreads[].
    */
   lui t0, %hi(cputhreads)	/* load upper half of cpustacks base addr */
   sll v0, a0, 2		/* get byte index for array (multiply by 4) */
   addu t0, t0, v0		/* add it in */
   lw s7, %lo(cputhreads)(t0)	/* load curthread register */

   /*
    * Initialize the TLB.
    */
   jal tlb_reset
   nop

   /*
    * Set up the status register, as described above.
    */
   li  t0, CST_IRQMASK		/* get value */
   mtc0 t0, c0_status		/* set status register */
   
   /*
    * Load the CPU number into the PTBASE field of the CONTEXT
    * register, as described above.
    */
   sll v0, a0, CTX_PTBASESHIFT
   mtc0 v0, c0_context

   /*
    * Initialize the on-chip timer interrupt.
    *
    * This should be set to CPU_FREQUENCY/HZ, but we don't have either
    * of those values here, so we'll arbitrarily set it to 100,000. It
    * will get reset to the right thing after it first fires.
    */
   li v0, 100000
   mtc0 v0, c0_compare


   /*
    * Load the GP register.
    */
   la gp, _gp

   /*
    * Set up a stack frame. Store zero into the return address slot so
    * we show as the top of the stack.
    */
   addiu sp, sp, -24
   sw z0, 20(sp)

   /*
    * Off to MI code. Pass the cpu number as the argument; it's already
    * in the a0 register.
    */
   j cpu_hatch
   nop				/* delay slot for jump */
   .end cpu_start_secondary
